Import all the necessary libraries.
from tensorflow import keras
from tensorflow.keras import layers
import pathlib
from tensorflow.keras.utils import image_dataset_from_directory
import pandas as pd
import pathlib
from pathlib import Path
import numpy as np
import pandas as pd
# plotting modules
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import plotly as plotly
plotly.offline.init_notebook_mode()
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
from keras.utils import to_categorical
from keras.models import load_model
import plotly.graph_objects as go
from tensorflow.keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, precision_recall_curve, ConfusionMatrixDisplay
The goal of this lab is to work through a common practice of Deep Learning Engineers - that is - take an existing model, that does something similar to what the engineer is interested doing, and fine-tune it for the specific task at-hand.
For this report, we will be downloading the data from Kaggle.
data_folder = pathlib.Path('../../CSCN8010-Foundations-of-Machine-Learning/data/dogs-vs-cats-small')
We will use the below function and code to get the image datasets from the folder directory.
train_dataset = image_dataset_from_directory(
data_folder / "train",
image_size=(180, 180),
batch_size=32)
validation_dataset = image_dataset_from_directory(
data_folder / "validation",
image_size=(180, 180),
batch_size=32)
test_dataset = image_dataset_from_directory(
data_folder / "test",
image_size=(180, 180),
batch_size=32)
Found 2000 files belonging to 2 classes. Found 1000 files belonging to 2 classes. Found 2000 files belonging to 2 classes.
test_dataset
<_BatchDataset element_spec=(TensorSpec(shape=(None, 180, 180, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>
Then the function and the code in the block below will covert the datasets into features and labels that we can now interact with.
We will talking about the conv_base later. But for now, we need it to extract our features and labels.
conv_base = keras.applications.vgg16.VGG16(
weights="imagenet",
include_top=False,
input_shape=(180, 180, 3))
import numpy as np
def get_features_and_labels(dataset):
all_features = []
all_labels = []
for images, labels in dataset:
preprocessed_images = keras.applications.vgg16.preprocess_input(images)
features = conv_base.predict(preprocessed_images)
all_features.append(features)
all_labels.append(labels)
return np.concatenate(all_features), np.concatenate(all_labels)
train_features, train_labels = get_features_and_labels(train_dataset)
val_features, val_labels = get_features_and_labels(validation_dataset)
test_features, test_labels = get_features_and_labels(test_dataset)
1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 877ms/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 1s 1s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 1s 790ms/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 4s 4s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 4s 4s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 4s 4s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 4s 4s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 4s 4s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 4s 4s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 3s 3s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step 1/1 [==============================] - 2s 2s/step
train_features.shape, val_features.shape, test_features.shape
((2000, 5, 5, 512), (1000, 5, 5, 512), (2000, 5, 5, 512))
train_labels
array([1, 0, 1, ..., 1, 0, 1])
Let us first put our data into a usable format we can explore.
def create_image_dataframe(base_folder, dataset_type):
# Initialize a list to store the information
data = []
dataset_folder = base_folder / dataset_type # e.g., 'train', 'validation', 'test'
# Iterate through each subfolder ('cat' and 'dog') in the given directory
for subfolder in ['cat', 'dog']:
full_subfolder_path = dataset_folder / subfolder
# Iterate through each image file in the subfolder
for image_path in full_subfolder_path.glob('*.jpg'):
# Construct a concise relative path in the format "train/cat"
concise_path = f"{dataset_type}/{subfolder}/{image_path.name}"
# Extract the image name (file name without the extension)
image_name = image_path.stem
# The label is determined by the subfolder name
label = subfolder
# Append the information to the list
data.append({'ImagePath': concise_path, 'ImageName': image_name, 'Label': label})
# Create a DataFrame from the list
return pd.DataFrame(data)
# Define the base path for the data
data_folder = Path('../../CSCN8010-Foundations-of-Machine-Learning/data/dogs-vs-cats-small')
# Create DataFrames for train, validation, and test sets
df_train = create_image_dataframe(data_folder, 'train')
df_validation = create_image_dataframe(data_folder, 'validation')
df_test = create_image_dataframe(data_folder, 'test')
# Display the first few rows of each DataFrame as a sanity check
print("Training Set:")
display(df_train.head())
print("\nValidation Set:")
display(df_validation.head())
print("\nTest Set:")
display(df_test.head())
Training Set:
| ImagePath | ImageName | Label | |
|---|---|---|---|
| 0 | train/cat/cat.0.jpg | cat.0 | cat |
| 1 | train/cat/cat.1.jpg | cat.1 | cat |
| 2 | train/cat/cat.10.jpg | cat.10 | cat |
| 3 | train/cat/cat.100.jpg | cat.100 | cat |
| 4 | train/cat/cat.101.jpg | cat.101 | cat |
Validation Set:
| ImagePath | ImageName | Label | |
|---|---|---|---|
| 0 | validation/cat/cat.1000.jpg | cat.1000 | cat |
| 1 | validation/cat/cat.1001.jpg | cat.1001 | cat |
| 2 | validation/cat/cat.1002.jpg | cat.1002 | cat |
| 3 | validation/cat/cat.1003.jpg | cat.1003 | cat |
| 4 | validation/cat/cat.1004.jpg | cat.1004 | cat |
Test Set:
| ImagePath | ImageName | Label | |
|---|---|---|---|
| 0 | test/cat/cat.1500.jpg | cat.1500 | cat |
| 1 | test/cat/cat.1501.jpg | cat.1501 | cat |
| 2 | test/cat/cat.1502.jpg | cat.1502 | cat |
| 3 | test/cat/cat.1503.jpg | cat.1503 | cat |
| 4 | test/cat/cat.1504.jpg | cat.1504 | cat |
df_train['Set'] = 'Train'
df_validation['Set'] = 'Validation'
df_test['Set'] = 'Test'
# Concatenate the DataFrames
df_merged = pd.concat([df_train, df_validation, df_test], ignore_index=True)
from PIL import Image
def plot_cat_images(df, base_folder, total_images=20, images_per_row=5):
cat_df = df[df['Label'] == 'cat']
plt.figure(figsize=(40, 40))
for i in range(total_images):
random_row = cat_df.sample(n=1).iloc[0]
image_path = base_folder / random_row['ImagePath']
image = Image.open(image_path)
total_rows = total_images // images_per_row + int(total_images % images_per_row != 0)
plt.subplot(total_rows, images_per_row, i+1)
plt.imshow(image)
plt.axis('off')
plt.subplots_adjust(wspace=0, hspace=0)
plt.tight_layout(pad=0)
plt.show()
plot_cat_images(df_merged, data_folder)